endowment_data <- read_rds("./data/endowment_filter_data_990.RDS") %>%
select(EIN, fiscal_year, contains("CY"))
companies_to_ein <- read_csv("./data/companies.csv") %>%
mutate(EIN = as.character(ein)) %>%
select(EIN, organization_name)
# only include EINs that have at least one observation of
# one of the endowment variables
include_eins <- endowment_data %>%
pivot_longer(-c(EIN,fiscal_year)) %>%
group_by(EIN) %>%
summarize(na_count = sum(is.na(value)),
total_rows = n()) %>%
filter(na_count < total_rows) %>%
pull(EIN) %>% unique()
endowment_data <- endowment_data %>%
filter(EIN %in% include_eins) %>%
group_by(EIN) %>%
pivot_longer(3: ncol(.),
names_to = "variable_name") %>%
mutate(source = ifelse(grepl("CYM", variable_name),
substr(variable_name, 1,4), "CY"),
year_lag = ifelse(grepl("CYM", variable_name),
substr(source, 4,4), 0),
year_lag = as.numeric(year_lag),
fiscal_year = as.numeric(paste0(fiscal_year)),
variable_name = gsub("CY|CYM.", "", variable_name)) %>%
mutate(value_year = fiscal_year -year_lag
) %>%
group_by(EIN, value_year, variable_name) %>%
arrange(EIN, variable_name, fiscal_year) %>%
# pick the most recent one available
slice_max(n = 1, order_by = fiscal_year) %>%
select(EIN, value_year, variable_name, source, value) %>%
rename(fiscal_year=value_year) %>%
ungroup() %>%
left_join(companies_to_ein)
plot_variable <- function(var) {
# get eins with at least one observation of the variable
eins_with_variable <- endowment_data %>%
filter(variable_name == var) %>%
group_by(EIN) %>%
summarize(number_observations = sum(!is.na(value))) %>%
filter(number_observations != 0) %>%
pull(EIN)
data <- endowment_data %>%
filter(EIN %in% eins_with_variable & variable_name == var) %>%
group_by(EIN) %>%
mutate(EIN_mean = mean(value,na.rm= TRUE)) %>%
ungroup() %>%
group_by(variable_name) %>%
mutate(quantile_group = ntile(EIN_mean, n = 4)) %>%
group_by(EIN) %>%
# make sure EIN has single quantile group
mutate(quantile_group = max(quantile_group)) %>%
mutate(quantile_group_labels = factor(paste0("Quantile ", quantile_group))) %>%
mutate(organization_name = ifelse(is.na(organization_name),
"Not Available",
organization_name)) %>%
ungroup()
data_no_nas <- data %>% filter(!is.na(value))
data %>%
ggplot(aes(x = fiscal_year, y = value, color = EIN, label = organization_name)) +
geom_line(data = data_no_nas,
aes(
x = fiscal_year,
y = value, group = EIN),
color = "darkgray",
linetype = "dotted") +
geom_point(size = .6) +
geom_line() +
facet_wrap(~fct_reorder(
quantile_group_labels,
.x = quantile_group), scales="free_y", ncol = 2) +
scale_y_continuous(labels = comma) +
viridis::scale_color_viridis(option = "mako", discrete = TRUE, end=.94) +
theme_bw() +
labs(title = paste0("Change in ", var, " Over Time"),
x = "Fiscal Year") +
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",
margin =margin(.1,.1,10,.1)),
plot.subtitle = element_text(hjust = .5, face="italic"),
axis.text.x = element_text(size = 13),
axis.title = element_text(size = 13, face = "bold"),
legend.position = "none")
}
Interactive Plots
m <- list(
l = 50,
r = 50,
b = 100,
t = 150,
pad = 0.5
)
# https://github.com/plotly/plotly.R/issues/570
plotlist <- map(unique(endowment_data$variable_name),
~{plt <- plot_variable(.x)
plt <- ggplotly(plt) %>% layout(height = 450,
margin =m)
}
)
htmltools::tagList(setNames(plotlist, NULL))